Support vector machine (SVM) calibration experiment


In [6]:
from sklearn import grid_search
from sklearn.cross_validation import StratifiedShuffleSplit
from sklearn.cross_validation import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from time import time

import numpy as np
import pandas as pd
import sys

SVM calibration

Here are defined the features to be evaluated and the parameters of the algorithm to be tested with, the best classifier is stored in 'estimators_array' for each one of the 'csv_file_features' values


In [10]:
#Test each array of features in the training dataset
if __name__ == '__main__':
    
    csv_file_features = [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], 
                         [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13]]
    
    estimators_array = []
    
    print('Order of results')
    print('Training score, Test score, Time, Features, SVM parameters\n')
    
    C_range = [0.01, 0.1, 1, 10, 100]
    gamma_range = [0.01, 0.1, 1, 10, 100]

    #Test for all the features sets
    for i in csv_file_features:
        stdsc = StandardScaler()
        df = pd.read_csv("../../Dataset/Train/EEG_Train_Sorted.csv")

        #Separate Class labels anda data
        X = df.ix[:, i]
        y = df['Class']
        
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)
        
        #Standarize data
        X_train_std = stdsc.fit_transform(X_train)
        X_test_std = stdsc.transform(X_test)
        
        #Create SVM and define parameters
        svm = SVC()
        param_grid = [{'C': C_range, 
                        'gamma': gamma_range, 
                        'kernel': ['rbf'], 
                        'decision_function_shape': ['ovr'], 
                        'random_state': [0]}]
            
        #Set Grid Params
        init = time()
        gscv = grid_search.GridSearchCV(svm, 
                                       param_grid, 
                                       n_jobs=6, 
                                       cv=StratifiedShuffleSplit(y=y_train,  
                                                                 n_iter=5, 
                                                                 test_size=0.4, 
                                                                 random_state=0))
        #Test classifier
        gscv.fit(X_train_std, y_train)
        sys.exit("Error message")
        #Save our estimator
        estimators_array.append(gscv)
        
        #Report testing results
        print('{:.5f}, {:.5f}, {:.5f}s, {}, {}'.format(gscv.best_score_, gscv.score(X_test_std, y_test), time() - init, i, gscv.best_params_))


Order of results
Training score, Test score, Time, Features, SVM parameters

C:\Anaconda3\lib\site-packages\sklearn\externals\joblib\hashing.py:197: DeprecationWarning: Changing the shape of non-C contiguous array by
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
C:\Anaconda3\lib\site-packages\sklearn\externals\joblib\hashing.py:197: DeprecationWarning: Changing the shape of non-C contiguous array by
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
C:\Anaconda3\lib\site-packages\sklearn\externals\joblib\hashing.py:197: DeprecationWarning: Changing the shape of non-C contiguous array by
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
C:\Anaconda3\lib\site-packages\sklearn\externals\joblib\hashing.py:197: DeprecationWarning: Changing the shape of non-C contiguous array by
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
C:\Anaconda3\lib\site-packages\sklearn\externals\joblib\hashing.py:197: DeprecationWarning: Changing the shape of non-C contiguous array by
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
C:\Anaconda3\lib\site-packages\sklearn\externals\joblib\hashing.py:197: DeprecationWarning: Changing the shape of non-C contiguous array by
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
C:\Anaconda3\lib\site-packages\sklearn\externals\joblib\hashing.py:197: DeprecationWarning: Changing the shape of non-C contiguous array by
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
C:\Anaconda3\lib\site-packages\sklearn\externals\joblib\hashing.py:197: DeprecationWarning: Changing the shape of non-C contiguous array by
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
C:\Anaconda3\lib\site-packages\sklearn\externals\joblib\hashing.py:197: DeprecationWarning: Changing the shape of non-C contiguous array by
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
C:\Anaconda3\lib\site-packages\sklearn\externals\joblib\hashing.py:197: DeprecationWarning: Changing the shape of non-C contiguous array by
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
C:\Anaconda3\lib\site-packages\sklearn\externals\joblib\hashing.py:197: DeprecationWarning: Changing the shape of non-C contiguous array by
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
C:\Anaconda3\lib\site-packages\sklearn\externals\joblib\hashing.py:197: DeprecationWarning: Changing the shape of non-C contiguous array by
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
C:\Anaconda3\lib\site-packages\sklearn\externals\joblib\hashing.py:197: DeprecationWarning: Changing the shape of non-C contiguous array by
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
C:\Anaconda3\lib\site-packages\sklearn\externals\joblib\hashing.py:197: DeprecationWarning: Changing the shape of non-C contiguous array by
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
C:\Anaconda3\lib\site-packages\sklearn\externals\joblib\hashing.py:197: DeprecationWarning: Changing the shape of non-C contiguous array by
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
C:\Anaconda3\lib\site-packages\sklearn\externals\joblib\hashing.py:197: DeprecationWarning: Changing the shape of non-C contiguous array by
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
C:\Anaconda3\lib\site-packages\sklearn\externals\joblib\hashing.py:197: DeprecationWarning: Changing the shape of non-C contiguous array by
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
C:\Anaconda3\lib\site-packages\sklearn\externals\joblib\hashing.py:197: DeprecationWarning: Changing the shape of non-C contiguous array by
descriptor assignment is deprecated. To maintain
the Fortran contiguity of a multidimensional Fortran
array, use 'a.T.view(...).T' instead
  obj_bytes_view = obj.view(self.np.uint8)
C:\Anaconda3\lib\site-packages\sklearn\externals\joblib\pool.py:436: UserWarning: Failed to clean temporary folder: C:\Users\c_am_\AppData\Local\Temp\joblib_memmaping_pool_7456_1997645054584
  warnings.warn("Failed to clean temporary folder: %s" % folder_path)
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
C:\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in retrieve(self)
    726             try:
--> 727                 self._output.extend(job.get())
    728             except tuple(self.exceptions) as exception:

C:\Anaconda3\lib\multiprocessing\pool.py in get(self, timeout)
    601     def get(self, timeout=None):
--> 602         self.wait(timeout)
    603         if not self.ready():

C:\Anaconda3\lib\multiprocessing\pool.py in wait(self, timeout)
    598     def wait(self, timeout=None):
--> 599         self._event.wait(timeout)
    600 

C:\Anaconda3\lib\threading.py in wait(self, timeout)
    548             if not signaled:
--> 549                 signaled = self._cond.wait(timeout)
    550             return signaled

C:\Anaconda3\lib\threading.py in wait(self, timeout)
    292             if timeout is None:
--> 293                 waiter.acquire()
    294                 gotit = True

KeyboardInterrupt: 

During handling of the above exception, another exception occurred:

KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-10-65307e56070b> in <module>()
     46                                                                  random_state=0))
     47         #Test classifier
---> 48         gscv.fit(X_train_std, y_train)
     49         sys.exit("Error message")
     50         #Save our estimator

C:\Anaconda3\lib\site-packages\sklearn\grid_search.py in fit(self, X, y)
    802 
    803         """
--> 804         return self._fit(X, y, ParameterGrid(self.param_grid))
    805 
    806 

C:\Anaconda3\lib\site-packages\sklearn\grid_search.py in _fit(self, X, y, parameter_iterable)
    551                                     self.fit_params, return_parameters=True,
    552                                     error_score=self.error_score)
--> 553                 for parameters in parameter_iterable
    554                 for train, test in cv)
    555 

C:\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self, iterable)
    808                 # consumption.
    809                 self._iterating = False
--> 810             self.retrieve()
    811             # Make sure that we get a last message telling us we are done
    812             elapsed_time = time.time() - self._start_time

C:\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in retrieve(self)
    748                 # the results as we will raise the exception we got back
    749                 # to the caller instead of returning any result.
--> 750                 self._terminate_pool()
    751                 if self._managed_pool:
    752                     # In case we had to terminate a managed pool, let

C:\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in _terminate_pool(self)
    547         if self._pool is not None:
    548             self._pool.close()
--> 549             self._pool.terminate()  # terminate does a join()
    550             self._pool = None
    551             if self.backend == 'multiprocessing':

C:\Anaconda3\lib\site-packages\sklearn\externals\joblib\pool.py in terminate(self)
    581 
    582     def terminate(self):
--> 583         super(MemmapingPool, self).terminate()
    584         delete_folder(self._temp_folder)

C:\Anaconda3\lib\multiprocessing\pool.py in terminate(self)
    503         self._state = TERMINATE
    504         self._worker_handler._state = TERMINATE
--> 505         self._terminate()
    506 
    507     def join(self):

C:\Anaconda3\lib\multiprocessing\util.py in __call__(self, wr, _finalizer_registry, sub_debug, getpid)
    183                 sub_debug('finalizer calling %s with args %s and kwargs %s',
    184                           self._callback, self._args, self._kwargs)
--> 185                 res = self._callback(*self._args, **self._kwargs)
    186             self._weakref = self._callback = self._args = \
    187                             self._kwargs = self._key = None

C:\Anaconda3\lib\multiprocessing\pool.py in _terminate_pool(cls, taskqueue, inqueue, outqueue, pool, worker_handler, task_handler, result_handler, cache)
    544         util.debug('joining worker handler')
    545         if threading.current_thread() is not worker_handler:
--> 546             worker_handler.join()
    547 
    548         # Terminate workers which haven't already finished.

C:\Anaconda3\lib\threading.py in join(self, timeout)
   1052 
   1053         if timeout is None:
-> 1054             self._wait_for_tstate_lock()
   1055         else:
   1056             # the behavior of a negative timeout isn't documented, but

C:\Anaconda3\lib\threading.py in _wait_for_tstate_lock(self, block, timeout)
   1068         if lock is None:  # already determined that the C code is done
   1069             assert self._is_stopped
-> 1070         elif lock.acquire(block, timeout):
   1071             lock.release()
   1072             self._stop()

KeyboardInterrupt: 

Now we check the parameters of the best estimator


In [ ]:
#Get best estimator parameters
print (estimators_array[0])